Make RssAgent emit events for new feed items in chronological order

RssAgent now sorts feed items by the date published and then by the last
updated time when creating events.

Previously it would create an event for each new feed item in order of
appearance, but when most feeds list items in reverse chronological
order you would often end up seeing newer items processed earlier than
older items.

Akinori MUSHA 9 years ago
parent
commit
86ca2f6284
3 changed files with 7 additions and 4 deletions
  1. 1 0
      CHANGES.md
  2. 1 1
      app/models/agents/rss_agent.rb
  3. 5 3
      spec/models/agents/rss_agent_spec.rb

+ 1 - 0
CHANGES.md

@@ -1,5 +1,6 @@
1 1
 # Changes
2 2
 
3
+* Jun 17, 2015   - RssAgent emits events for new feed items in chronological order.
3 4
 * Jun 15, 2015   - Liquid filter `uri_expand` added.
4 5
 * Jun 12, 2015   - RSSAgent can now accept an array of URLs.
5 6
 * Jun 8, 2015    - WebsiteAgent includes a `use_namespaces` option to enable XML namespaces.

+ 1 - 1
app/models/agents/rss_agent.rb

@@ -77,7 +77,7 @@ module Agents
77 77
           feed = FeedNormalizer::FeedNormalizer.parse(response.body)
78 78
           feed.clean! if interpolated['clean'] == 'true'
79 79
           created_event_count = 0
80
-          feed.entries.each do |entry|
80
+          feed.entries.sort_by { |entry| [entry.date_published, entry.last_updated] }.each do |entry|
81 81
             entry_id = get_entry_id(entry)
82 82
             if check_and_track(entry_id)
83 83
               created_event_count += 1

+ 5 - 3
spec/models/agents/rss_agent_spec.rb

@@ -59,9 +59,11 @@ describe Agents::RssAgent do
59 59
         agent.check
60 60
       }.to change { agent.events.count }.by(20)
61 61
 
62
-      event = agent.events.last
63
-      expect(event.payload['url']).to eq("https://github.com/cantino/huginn/commit/d0a844662846cf3c83b94c637c1803f03db5a5b0")
64
-      expect(event.payload['urls']).to eq(["https://github.com/cantino/huginn/commit/d0a844662846cf3c83b94c637c1803f03db5a5b0"])
62
+      first, *, last = agent.events.last(20)
63
+      expect(first.payload['url']).to eq("https://github.com/cantino/huginn/commit/d0a844662846cf3c83b94c637c1803f03db5a5b0")
64
+      expect(first.payload['urls']).to eq(["https://github.com/cantino/huginn/commit/d0a844662846cf3c83b94c637c1803f03db5a5b0"])
65
+      expect(last.payload['url']).to eq("https://github.com/cantino/huginn/commit/d465158f77dcd9078697e6167b50abbfdfa8b1af")
66
+      expect(last.payload['urls']).to eq(["https://github.com/cantino/huginn/commit/d465158f77dcd9078697e6167b50abbfdfa8b1af"])
65 67
     end
66 68
 
67 69
     it "should track ids and not re-emit the same item when seen again" do